R/sae overall function.R

#' @title ELLsae
#' @description Beschreibung der Funktion
#'
#' @param model a model that is specified for the relationship betwenn
#' the response varibale and the regressors. Model must be a linear model that can be processed by \code{lm()}
#' @param surveydata Smaller surveydata with additional response variable of interest.
#' Will be used to estimate the linear model
#' @param censusdata The dataset in which a certain variable is supposed to be imputed
#' @param location_survey Name of location variable or vector for the survey data which is used for
#' error correction and the location means (if \code{mResponse} is specified)
#' @param mResponse Additional parameters for the regression based on location means
#' calculated from the census data to account for the lack of information in a small survey
#' @param n_boot Number of bootstrap samples used for the estimation, default is 50
#' @param welfare.function Additionally a welfare function for the response can be specified
#' @export yes
#' @return Was die Funktion ausspuckt.
#' @references
#' @seealso
#' @keywords
#' @examples



ELLsae <- function(model, surveydata, censusdata, location_survey, mResponse, n_boot = 50, welfare.function){


  # --------------------------------------------------------------------------------- #
  # ------- check whether all parameters are specified, if not try to reformat ------ #
  # --------------------------------------------------------------------------------- #

  ##### check whether model is specified correctly and try to correct misspecification
  if(missing(model)){stop("A model has to be specified")}
  if(class(model) != "formula"){
    model <- try(as.formula(model), silent = T)
    if (class(model) == "try-error"){
      stop("model must either be provided as a formula or as a string.
           See ?formula for help")
    }
  }
  ##### check whether surveydata is specified correctly and try to correct
  if(missing(surveydata)) stop("Data frame with the surveydata is missing")
  if(class(surveydata) != "data.frame"){
    surveydata <- try(as.data.frame(surveydata), silent = T)
    if (class(surveydata) == "try-error"){
      stop("survey data should be provided as data.frame or something similar.
           ELLsae was not able to convert your input into a data.frame")
    }
  }
  n_obs_survey <- nrow(surveydata)

  ##### check whether censusdata is specified correctly and try to correct
  if(missing(censusdata)) stop("Data frame with the censusdata is missing")
  if(class(censusdata) != "data.frame"){ # alternativ if(!is.data.frame(censusdata))?
    censusdata <- try(as.data.frame(censusdata))
    if (class(censusdata) == "try-error"){
      stop("census data should be provided as data.frame or something similar.
           ELLsae was not able to convert your input into a data.frame")
    }
  }
  n_obs_census <- nrow(censusdata)

  ##### check whether the locations are specified correctly and try to correct
  if(missing(location_survey)) stop("you have to provide either 1) a vector of locations of length corresponding to the number of observations in the survey data or 2) a string with the name of a variable in the surveydata that provides the locations of individual observations")
  if(!is.vector(location_survey)){
    location_survey <- try(as.vector(location_survey))
    if(class(location_survey) == "try-error"){stop("you have to provide either 1) a vector of locations of length corresponding to the number of observations in the survey data or 2) a string with the name of a variable in the surveydata that provides the locations of individual observations")
    }
  }
  # if locations are specified as string with variable name, convert into vector
  if (length(location_survey) == 1 & is.character(location_survey)) {
    if (any(location_survey == names(surveydata))){
      location_survey <- as.vector(eval(parse(text = paste("surveydata$", location_survey, sep = ""))))
    }
    else {
      stop("String that was specified as variable name for the location is not the name of one of the variables in the survey data set.")
    }
  }
  # make sure that location vector has correct length
  if(length(location_survey) != n_obs_survey){
    stop("Number of locations provided does not correspond to the number of observations in the survey data set")
  }


  # # the following functions checks if all the arguments of the overall
  # # function are correctly specified
  # check.fun.arguments(model, surveydata, censusdata, location_survey,
  #                     mResponse, n_boot, welfare.function)



  # convert locations of surveydata into simple integers. Location of census is ignored
  location <- location.simplifier(location = location_survey)

  ### den Schritt braucht man eigentlich nur, wenn die Obs nicht nach Location sortiert sind.
  unique_location <- unique(location)

  n_locations <- length(unique_location)


  # The following function computes means from the census for the regression of the survey dataset
  # and adds them to the surveydataset to be included in the later regression
  if(!missing(mResponse)){
    list_model <- mean.for.regression(mResponse, censusdata, surveydata, model, location_survey)
    surveydata <- as.data.frame(list_model[[2]])
    model <- as.formula(list_model[[1]])
    rm(list_model)
  }


  ### ggf. alle Beobachtungen nach Location sortieren? Das ermöglicht den
  # komplizierten Residualbootstrap effizient




  inference_survey <- sae.inference.survey(model = model,
                                           surveydata = surveydata,
                                           location = location,
                                           unique_location = unique_location)

  if(!missing(welfare.function)){
    sae_inference_census <- sae.inference.census(model = model,
                                                 censusdata = censusdata,
                                                 location = location,
                                                 n_obs_census = n_obs_census,
                                                 n_obs_survey = n_obs_survey,
                                                 n_locations = n_locations,
                                                 n_boot = n_boot,
                                                 model_fit_survey = inference_survey$model_fit_surv,
                                                 welfare.function = welfare.function,
                                                 inference_survey = inference_survey)

  } else {
    sae_inference_census <- sae.inference.census(model = model,
                                                 censusdata = censusdata,
                                                 location = location,
                                                 n_obs_census = n_obs_census,
                                                 n_obs_survey = n_obs_survey,
                                                 n_locations = n_locations,
                                                 n_boot = n_boot,
                                                 model_fit_survey = inference_survey$model_fit_surv,
                                                 inference_survey = inference_survey)
  }



}
nikosbosse/SAE documentation built on May 12, 2019, 4:37 a.m.